#linear regression (LM) model with Jacobs as dependent variable
#and herd size (categories) and body mass (KG) as independent variables

#starting
setwd("~/Desktop/Tuli thesis/Statistics")
getwd()

#for repeatability, setting a seed to initialize a pseudo-random number generator. 
set.seed(1234)

DataLM <- read.csv (file = "RegSeason1.csv", header = TRUE, sep = ",")

plot(jacob ~ herd, 
     data=DataLM, 
     pch=16,
     xlab = "herd", 
     ylab = "jacob")

plot(jacob ~ biomass, 
     data=DataLM, 
     pch=16,
     xlab = "biomass", 
     ylab = "jacob")

#check correlation! 

cor.test( ~ biomass + availability, 
          data=DataLM,
          method = "pearson",
          conf.level = 0.95)

cor.test( ~ biomass + herd.size, 
          data=DataLM,
          method = "pearson",
          conf.level = 0.95)

cor.test( ~ biomass + herd, 
          data=DataLM,
          method = "pearson",
          conf.level = 0.95)

cor.test( ~ biomass + season, 
          data=DataLM,
          method = "pearson",
          conf.level = 0.95)

cor.test( ~ availability + herd.size, 
          data=DataLM,
          method = "pearson",
          conf.level = 0.95)

cor.test( ~ availability + herd, 
          data=DataLM,
          method = "pearson",
          conf.level = 0.95)

cor.test( ~ availability + season, 
          data=DataLM,
          method = "pearson",
          conf.level = 0.95)

cor.test( ~ herd.size + herd, 
          data=DataLM,
          method = "pearson",
          conf.level = 0.95)

cor.test( ~ herd.size + season, 
          data=DataLM,
          method = "pearson",
          conf.level = 0.95)

cor.test( ~ herd+season, 
          data=DataLM,
          method = "pearson",
          conf.level = 0.95)

#variable herd.size --> decided not to use, use variable herd 

plot(jacob ~ herd.size, data=DataLM)

ggplot(DataLM, aes(x = herd.size, y = jacob)) + 
  geom_point(na.rm=TRUE, size = 2) + 
  geom_smooth(method = "lm", formula=y~poly(x,3), col = "red") +
  theme_classic()

#variable herd (in categories)
#LMherd 

plot(jacob~factor(herd), data = DataLM)

LMherd = lm(jacob~factor(herd), data = DataLM)
summary(LMherd)

herd.av <- aov(LMherd)
summary(herd.av)

plot(jacob ~ factor(herd), data=DataLM)
ggplot(DataLM, aes(x=factor(herd), y = jacob)) +
  geom_point(na.rm = TRUE, size = 2) +
  geom_smooth(method = glm, formula= y ~ poly(x,2),
              na.rm = TRUE,
              colour = "red",
              fill = "grey") +
  theme_classic() +
  
LMherd = lm(jacob~factor(herd), data = DataLM)
summary(LMherd)
herd.av <- aov(LMherd)
summary(herd.av)
tukey <- TukeyHSD(herd.av)
tukey

library(multcompView)

cld<- multcompLetters4(herd.av, tukey)
print(cld)
library(ggplot2) 

#variable body mass (biomass)

plot(jacob ~ biomass, data=DataLM)

LMbiomass = lm(jacob~biomass, data=DataLM)
summary(LMbiomass)

ggplot(DataLM, aes(x = biomass, y = jacob)) + 
  geom_point(na.rm=TRUE, size = 2) + 
  geom_smooth(method = "lm", formula=y~poly(x,2), col = "red") +
  theme_classic()


